Covid Tracking Dataset (w/ hospitalised data)

Source: https://covidtracking.com/ Latest data and plots: https://github.com/bielsnor/futureproof/tree/master/_notebooks/results

Load and Clean the Data

Missing values: Retrieving from other datasets or through merging columns (or both)

The following will be done:

  • Active Cases: Retrieved from JHU dataset and calculating $active = pos-dead-recovered$
  • Beds per State: Retrieved from External Datasets
# TODO 
all_cases['active'] = all_cases['positive'] - all_cases['recovered'] - all_cases['death']
# change location of 'active' column
cols = list(all_cases)
cols.insert(3, cols.pop(cols.index('active')))
all_cases = all_cases.loc[:, cols]
# Load datasets for US population and Hospital beds per 1000
us_population = pd.read_csv('data/us_population.csv')
hosp_beds = pd.read_csv('data/hospital_beds.csv')
state_abbrev = pd.read_csv('data/us_state_names.csv')

# add state abbreviations to us_population and hospital beds dataframe
for state in state_abbrev['State'].tolist():
    # store state abbreviation in variable
    abbrev = state_abbrev.loc[state_abbrev['State'] == state, 'Abbreviation'].tolist()[0]
    # add abbrev to new column 'Abbreviation' in us_population df
    us_population.loc[us_population['State'] == state, 'Abbreviation'] = abbrev
    # add abbrev to new column in hosp_beds df
    hosp_beds.loc[hosp_beds['Location'] == state, 'Abbreviation'] = abbrev
    
# change order of columns of us_population
cols = list(us_population)
cols.insert(2, cols.pop(cols.index('Abbreviation')))
us_population = us_population.loc[:, cols]

# drop unnecessary columns of us_population
us_population = us_population.drop(columns=['rank', 'Growth', 'Pop2018', 'Pop2010', 'growthSince2010', 'Percent', 'density'])

# drop unnecessary columns of hosp_beds
hosp_beds = hosp_beds.drop(columns=['Location', 'State/Local Government', 'Non-Profit', 'For-Profit'])

# change order of columns of hosp_beds
cols = list(hosp_beds)
cols.insert(0, cols.pop(cols.index('Abbreviation')))
hosp_beds = hosp_beds.loc[:, cols]
# filter out non-existing states like 'AS'
all_cases = all_cases[all_cases['state'].isin(state_abbrev['Abbreviation'].tolist())]
# Split dataframes by date
df_split_by_date = dict(tuple(all_cases.groupby('date')))

# Split dataframes by state
df_split_by_state = dict(tuple(all_cases.groupby('state')))
# merge dataframes us_population and all_cases
df_merge_uspop = all_cases.merge(us_population, how='left', left_on='state', right_on='Abbreviation')
df_merge_uspop = df_merge_uspop.drop(columns=['Abbreviation'])
df_merge_uspop = df_merge_uspop.rename(columns={'Pop': 'population'})

# change location of 'population' column
cols = list(df_merge_uspop)
cols.insert(2, cols.pop(cols.index('population')))
df_merge_uspop = df_merge_uspop.loc[:, cols]

# merge dataframes hosp_beds and df_merge_uspop
df_merge_hosp = df_merge_uspop.merge(hosp_beds, how='left', left_on='state', right_on='Abbreviation')
df_merge_hosp = df_merge_hosp.drop(columns=['Abbreviation'])
all_cases = df_merge_hosp.rename(columns={'Total': 'bedsPerThousand'})
# Calculate the total beds, and add the column
all_cases['total_beds'] = all_cases['population'] / 1000 * all_cases['bedsPerThousand']
# change abbreviations to state names
all_cases = all_cases.rename(columns={'state': 'abbrev'})
all_cases = all_cases.rename(columns={'State': 'state'})
# change location of 'state' column
cols = list(all_cases)
cols.insert(1, cols.pop(cols.index('state')))
all_cases = all_cases.loc[:, cols]
#print data frame head for spot visual data verification

all_cases
date state abbrev population positive active hospitalizedCurrently hospitalizedCumulative inIcuCurrently onVentilatorCurrently ... negativeTestsViral positiveCasesViral commercialScore negativeRegularScore negativeScore positiveScore score grade bedsPerThousand total_beds
0 2020-06-27 Alaska AK 734002 854.0 319.0 11.0 NaN NaN 1.0 ... NaN NaN 0 0 0 0 0 NaN 2.2 1614.8044
1 2020-06-27 Alabama AL 4908621 35083.0 15298.0 655.0 2697.0 NaN NaN ... NaN 34605.0 0 0 0 0 0 NaN 3.1 15216.7251
2 2020-06-27 Arkansas AR 3038999 18740.0 5707.0 284.0 1337.0 NaN 63.0 ... NaN 18740.0 0 0 0 0 0 NaN 3.2 9724.7968
3 2020-06-27 Arizona AZ 7378494 70051.0 59813.0 2577.0 4595.0 657.0 433.0 ... NaN 69641.0 0 0 0 0 0 NaN 1.9 14019.1386
4 2020-06-27 California CA 39937489 206433.0 NaN 5790.0 NaN 1562.0 NaN ... NaN 206433.0 0 0 0 0 0 NaN 1.8 71887.4802
5 2020-06-27 Colorado CO 5845526 31796.0 25693.0 226.0 5392.0 NaN NaN ... NaN 28972.0 0 0 0 0 0 NaN 1.9 11106.4994
6 2020-06-27 Connecticut CT 3563077 46206.0 33842.0 106.0 10268.0 NaN NaN ... NaN 44225.0 0 0 0 0 0 NaN 2.0 7126.1540
7 2020-06-27 District of Columbia DC 720687 10216.0 8474.0 136.0 NaN 43.0 26.0 ... NaN 10216.0 0 0 0 0 0 NaN 4.4 3171.0228
8 2020-06-27 Delaware DE 982895 11091.0 3919.0 83.0 NaN 15.0 NaN ... NaN 10047.0 0 0 0 0 0 NaN 2.2 2162.3690
9 2020-06-27 Florida FL 21992985 132545.0 NaN NaN 14432.0 NaN NaN ... 2010839.0 132545.0 0 0 0 0 0 NaN 2.6 57181.7610
10 2020-06-27 Georgia GA 10736059 74985.0 NaN 1178.0 10689.0 NaN NaN ... 721245.0 74985.0 0 0 0 0 0 NaN 2.4 25766.5416
11 2020-06-27 Hawaii HI 1412687 866.0 144.0 NaN 109.0 NaN NaN ... 85598.0 866.0 0 0 0 0 0 NaN 1.9 2684.1053
12 2020-06-27 Iowa IA 3179849 28012.0 9841.0 131.0 NaN 40.0 22.0 ... NaN 28012.0 0 0 0 0 0 NaN 3.0 9539.5470
13 2020-06-27 Idaho ID 1826156 5148.0 1231.0 NaN 309.0 NaN NaN ... NaN 4629.0 0 0 0 0 0 NaN 1.9 3469.6964
14 2020-06-27 Illinois IL 12659682 142130.0 NaN 1516.0 NaN 400.0 225.0 ... NaN 141077.0 0 0 0 0 0 NaN 2.5 31649.2050
15 2020-06-27 Indiana IN 6745354 44575.0 8234.0 595.0 6982.0 257.0 82.0 ... NaN 44575.0 0 0 0 0 0 NaN 2.7 18212.4558
16 2020-06-27 Kansas KS 2910357 13538.0 12495.0 NaN 1128.0 NaN NaN ... NaN 13538.0 0 0 0 0 0 NaN 3.3 9604.1781
17 2020-06-27 Kentucky KY 4499692 14859.0 10576.0 387.0 2589.0 74.0 NaN ... NaN 14401.0 0 0 0 0 0 NaN 3.2 14399.0144
18 2020-06-27 Louisiana LA 4645184 54769.0 11787.0 700.0 NaN NaN 73.0 ... NaN 54769.0 0 0 0 0 0 NaN 3.3 15329.1072
19 2020-06-27 Massachusetts MA 6976597 108443.0 NaN 769.0 11310.0 143.0 90.0 ... NaN 103376.0 0 0 0 0 0 NaN 2.3 16046.1731
20 2020-06-27 Maryland MD 6083116 66450.0 58358.0 478.0 10751.0 181.0 NaN ... NaN 66450.0 0 0 0 0 0 NaN 1.9 11557.9204
21 2020-06-27 Maine ME 1345790 3154.0 484.0 24.0 345.0 7.0 5.0 ... 87604.0 2809.0 0 0 0 0 0 NaN 2.5 3364.4750
22 2020-06-27 Michigan MI 10045029 69679.0 12427.0 557.0 NaN 193.0 106.0 ... 931142.0 63009.0 0 0 0 0 0 NaN 2.5 25112.5725
23 2020-06-27 Minnesota MN 5700671 35033.0 3180.0 300.0 3986.0 155.0 NaN ... NaN 35033.0 0 0 0 0 0 NaN 2.5 14251.6775
24 2020-06-27 Missouri MO 6169270 20261.0 NaN 680.0 NaN NaN 66.0 ... 399926.0 20261.0 0 0 0 0 0 NaN 3.1 19124.7370
25 2020-06-27 Mississippi MS 2989260 25531.0 7254.0 731.0 3078.0 169.0 90.0 ... NaN 25368.0 0 0 0 0 0 NaN 4.0 11957.0400
26 2020-06-27 Montana MT 1086759 852.0 226.0 9.0 95.0 NaN NaN ... NaN 852.0 0 0 0 0 0 NaN 3.3 3586.3047
27 2020-06-27 North Carolina NC 10611862 60537.0 22304.0 888.0 NaN NaN NaN ... NaN 60537.0 0 0 0 0 0 NaN 2.1 22284.9102
28 2020-06-27 North Dakota ND 761723 3458.0 252.0 23.0 225.0 NaN NaN ... NaN 3458.0 0 0 0 0 0 NaN 4.3 3275.4089
29 2020-06-27 Nebraska NE 1952570 18524.0 5560.0 125.0 1312.0 NaN NaN ... NaN 18524.0 0 0 0 0 0 NaN 3.6 7029.2520
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5851 2020-02-20 Washington WA 7797095 82.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5852 2020-02-19 Washington WA 7797095 69.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5853 2020-02-18 Washington WA 7797095 59.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5854 2020-02-17 Washington WA 7797095 50.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5855 2020-02-16 Washington WA 7797095 35.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5856 2020-02-15 Washington WA 7797095 28.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5857 2020-02-14 Washington WA 7797095 21.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5858 2020-02-13 Washington WA 7797095 18.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5859 2020-02-12 Washington WA 7797095 18.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5860 2020-02-11 Washington WA 7797095 17.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5861 2020-02-10 Washington WA 7797095 16.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5862 2020-02-09 Washington WA 7797095 13.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5863 2020-02-08 Washington WA 7797095 13.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5864 2020-02-07 Washington WA 7797095 12.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5865 2020-02-06 Washington WA 7797095 11.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5866 2020-02-05 Washington WA 7797095 8.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5867 2020-02-04 Washington WA 7797095 8.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5868 2020-02-03 Washington WA 7797095 7.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5869 2020-02-02 Washington WA 7797095 6.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5870 2020-02-01 Washington WA 7797095 4.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5871 2020-01-31 Washington WA 7797095 3.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5872 2020-01-30 Washington WA 7797095 3.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5873 2020-01-29 Washington WA 7797095 3.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5874 2020-01-28 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5875 2020-01-27 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5876 2020-01-26 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5877 2020-01-25 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5878 2020-01-24 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5879 2020-01-23 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5880 2020-01-22 Washington WA 7797095 2.0 NaN NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615

5881 rows × 29 columns

  • Load and clean JHU data
  • Merge JHU dataset with main dataset
# This cell takes some time, as it needs to connect to Kaggle Servers to retrieve data
kaggle.api.authenticate()
kaggle.api.dataset_download_files('benhamner/jhucovid19', path='./kaggle/input/jhucovid19/', unzip=True)
# Get Time-Series Data of cases as Pandas DataFrame
dir_jhu = './kaggle/input/jhucovid19/csse_covid_19_data/csse_covid_19_daily_reports'

df_list = []
for dirname, _, files in os.walk(dir_jhu):
    for file in files:
        if 'gitignore' not in file and 'README' not in file:
            full_dir = os.path.join(dirname, file)
            df_list.append(pd.read_csv(full_dir))
            
jhu_df = pd.concat(df_list, axis=0, ignore_index=True, sort=True)

# convert Last Update columns to datetime format
jhu_df.loc[:, 'Last Update'] = pd.to_datetime(jhu_df['Last Update']).apply(lambda x: x.date())
jhu_df.loc[:, 'Last_Update'] = pd.to_datetime(jhu_df['Last_Update']).apply(lambda x: x.date())

# Combine Last Update with Last_Update
jhu_df['LastUpdate'] = jhu_df['Last_Update'].combine_first(jhu_df['Last Update'])

# Combine Country/Region with Country_Region
jhu_df['CountryRegion'] = jhu_df['Country/Region'].combine_first(jhu_df['Country_Region'])

# Retrieve only US data
jhu_df = jhu_df[jhu_df['CountryRegion']=='US']

# Combine Province/State with Province_State
jhu_df['ProvinceState'] = jhu_df['Province/State'].combine_first(jhu_df['Province_State'])

# Drop unnecessary columns
jhu_df = jhu_df.drop(['Admin2', 'Lat', 'Latitude', 'Long_', 'Longitude', 'Combined_Key', 'Country/Region',
                      'Country_Region', 'Province/State', 'Province_State',
                      'Last Update', 'Last_Update', 'FIPS'], axis=1)

# Change column order
cols = list(jhu_df)
cols.insert(0, cols.pop(cols.index('CountryRegion')))
cols.insert(1, cols.pop(cols.index('ProvinceState')))
cols.insert(2, cols.pop(cols.index('LastUpdate')))
jhu_df = jhu_df.loc[:, cols]

# Change region to known US states
state_abbrs_dict = {}
for state in us.states.STATES:
    state_abbrs_dict[state.abbr] = state.name

def toState(input_state, mapping):
    abbreviation = input_state.rstrip()[-2:]
    try:
        return_value = mapping[abbreviation]
    except KeyError:
        return_value = input_state
    return return_value

jhu_df['ProvinceState'] = jhu_df['ProvinceState'].apply(lambda x: toState(x, state_abbrs_dict) if x != 'Washington, D.C.' else 'District of Columbia')

# Filter out unknown states
jhu_df = jhu_df[jhu_df['ProvinceState'].isin(all_cases.state.unique().tolist())]

# Merge-sum rows with same date and State
jhu_df = jhu_df.groupby(['LastUpdate', 'ProvinceState']).agg(
    {
        'Active': sum,
        'Confirmed': sum,
        'Deaths': sum,
        'Recovered': sum
    }
).reset_index()

jhu_df.tail()
LastUpdate ProvinceState Active Confirmed Deaths Recovered
5190 2020-06-19 Virginia 54652.0 56238.0 1586.0 0.0
5191 2020-06-19 Washington 25947.0 27192.0 1245.0 0.0
5192 2020-06-19 West Virginia 2330.0 2418.0 88.0 0.0
5193 2020-06-19 Wisconsin 23157.0 23876.0 719.0 0.0
5194 2020-06-19 Wyoming 1126.0 1144.0 18.0 0.0
# Now that we have the JHU dataset relatively cleaned
# we can go ahead and merge its data with our main dataset

for i, row in all_cases.iterrows():
    last_update = all_cases.at[i, 'date']
    state = all_cases.at[i, 'state']
    matching_row = jhu_df[jhu_df['ProvinceState'] == state]
    matching_row = matching_row[matching_row['LastUpdate'] == last_update].reset_index()

    if len(matching_row.values) > 0:
        #all_cases.at[i, 'positive'] = matching_row['Confirmed'].values[0]
        all_cases.at[i, 'active'] = matching_row['Active'].values[0]
        #all_cases.at[i, 'recovered'] = matching_row['Recovered'].values[0]   --- JHU was inconsistent, therefore removed
        #all_cases.at[i, 'death'] = matching_row['Deaths'].values[0]

    # Replace unknown recovery numbers with 0
    if np.isnan(row['recovered']):
        all_cases.at[i, 'recovered'] = 0

    if all_cases.at[i, 'active'] == 0 or np.isnan(row['active']):
        positive = all_cases.at[i, 'positive']
        recovered = all_cases.at[i, 'recovered']
        dead = all_cases.at[i, 'death']
        all_cases.at[i, 'active'] = positive - recovered - dead

#all_cases.tail()
all_cases
date state abbrev population positive active hospitalizedCurrently hospitalizedCumulative inIcuCurrently onVentilatorCurrently ... negativeTestsViral positiveCasesViral commercialScore negativeRegularScore negativeScore positiveScore score grade bedsPerThousand total_beds
0 2020-06-27 Alaska AK 734002 854.0 319.0 11.0 NaN NaN 1.0 ... NaN NaN 0 0 0 0 0 NaN 2.2 1614.8044
1 2020-06-27 Alabama AL 4908621 35083.0 15298.0 655.0 2697.0 NaN NaN ... NaN 34605.0 0 0 0 0 0 NaN 3.1 15216.7251
2 2020-06-27 Arkansas AR 3038999 18740.0 5707.0 284.0 1337.0 NaN 63.0 ... NaN 18740.0 0 0 0 0 0 NaN 3.2 9724.7968
3 2020-06-27 Arizona AZ 7378494 70051.0 59813.0 2577.0 4595.0 657.0 433.0 ... NaN 69641.0 0 0 0 0 0 NaN 1.9 14019.1386
4 2020-06-27 California CA 39937489 206433.0 200561.0 5790.0 NaN 1562.0 NaN ... NaN 206433.0 0 0 0 0 0 NaN 1.8 71887.4802
5 2020-06-27 Colorado CO 5845526 31796.0 25693.0 226.0 5392.0 NaN NaN ... NaN 28972.0 0 0 0 0 0 NaN 1.9 11106.4994
6 2020-06-27 Connecticut CT 3563077 46206.0 33842.0 106.0 10268.0 NaN NaN ... NaN 44225.0 0 0 0 0 0 NaN 2.0 7126.1540
7 2020-06-27 District of Columbia DC 720687 10216.0 8474.0 136.0 NaN 43.0 26.0 ... NaN 10216.0 0 0 0 0 0 NaN 4.4 3171.0228
8 2020-06-27 Delaware DE 982895 11091.0 3919.0 83.0 NaN 15.0 NaN ... NaN 10047.0 0 0 0 0 0 NaN 2.2 2162.3690
9 2020-06-27 Florida FL 21992985 132545.0 129056.0 NaN 14432.0 NaN NaN ... 2010839.0 132545.0 0 0 0 0 0 NaN 2.6 57181.7610
10 2020-06-27 Georgia GA 10736059 74985.0 72209.0 1178.0 10689.0 NaN NaN ... 721245.0 74985.0 0 0 0 0 0 NaN 2.4 25766.5416
11 2020-06-27 Hawaii HI 1412687 866.0 144.0 NaN 109.0 NaN NaN ... 85598.0 866.0 0 0 0 0 0 NaN 1.9 2684.1053
12 2020-06-27 Iowa IA 3179849 28012.0 9841.0 131.0 NaN 40.0 22.0 ... NaN 28012.0 0 0 0 0 0 NaN 3.0 9539.5470
13 2020-06-27 Idaho ID 1826156 5148.0 1231.0 NaN 309.0 NaN NaN ... NaN 4629.0 0 0 0 0 0 NaN 1.9 3469.6964
14 2020-06-27 Illinois IL 12659682 142130.0 135056.0 1516.0 NaN 400.0 225.0 ... NaN 141077.0 0 0 0 0 0 NaN 2.5 31649.2050
15 2020-06-27 Indiana IN 6745354 44575.0 8234.0 595.0 6982.0 257.0 82.0 ... NaN 44575.0 0 0 0 0 0 NaN 2.7 18212.4558
16 2020-06-27 Kansas KS 2910357 13538.0 12495.0 NaN 1128.0 NaN NaN ... NaN 13538.0 0 0 0 0 0 NaN 3.3 9604.1781
17 2020-06-27 Kentucky KY 4499692 14859.0 10576.0 387.0 2589.0 74.0 NaN ... NaN 14401.0 0 0 0 0 0 NaN 3.2 14399.0144
18 2020-06-27 Louisiana LA 4645184 54769.0 11787.0 700.0 NaN NaN 73.0 ... NaN 54769.0 0 0 0 0 0 NaN 3.3 15329.1072
19 2020-06-27 Massachusetts MA 6976597 108443.0 100402.0 769.0 11310.0 143.0 90.0 ... NaN 103376.0 0 0 0 0 0 NaN 2.3 16046.1731
20 2020-06-27 Maryland MD 6083116 66450.0 58358.0 478.0 10751.0 181.0 NaN ... NaN 66450.0 0 0 0 0 0 NaN 1.9 11557.9204
21 2020-06-27 Maine ME 1345790 3154.0 484.0 24.0 345.0 7.0 5.0 ... 87604.0 2809.0 0 0 0 0 0 NaN 2.5 3364.4750
22 2020-06-27 Michigan MI 10045029 69679.0 12427.0 557.0 NaN 193.0 106.0 ... 931142.0 63009.0 0 0 0 0 0 NaN 2.5 25112.5725
23 2020-06-27 Minnesota MN 5700671 35033.0 3180.0 300.0 3986.0 155.0 NaN ... NaN 35033.0 0 0 0 0 0 NaN 2.5 14251.6775
24 2020-06-27 Missouri MO 6169270 20261.0 19265.0 680.0 NaN NaN 66.0 ... 399926.0 20261.0 0 0 0 0 0 NaN 3.1 19124.7370
25 2020-06-27 Mississippi MS 2989260 25531.0 7254.0 731.0 3078.0 169.0 90.0 ... NaN 25368.0 0 0 0 0 0 NaN 4.0 11957.0400
26 2020-06-27 Montana MT 1086759 852.0 226.0 9.0 95.0 NaN NaN ... NaN 852.0 0 0 0 0 0 NaN 3.3 3586.3047
27 2020-06-27 North Carolina NC 10611862 60537.0 22304.0 888.0 NaN NaN NaN ... NaN 60537.0 0 0 0 0 0 NaN 2.1 22284.9102
28 2020-06-27 North Dakota ND 761723 3458.0 252.0 23.0 225.0 NaN NaN ... NaN 3458.0 0 0 0 0 0 NaN 4.3 3275.4089
29 2020-06-27 Nebraska NE 1952570 18524.0 5560.0 125.0 1312.0 NaN NaN ... NaN 18524.0 0 0 0 0 0 NaN 3.6 7029.2520
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5851 2020-02-20 Washington WA 7797095 82.0 82.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5852 2020-02-19 Washington WA 7797095 69.0 69.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5853 2020-02-18 Washington WA 7797095 59.0 59.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5854 2020-02-17 Washington WA 7797095 50.0 50.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5855 2020-02-16 Washington WA 7797095 35.0 35.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5856 2020-02-15 Washington WA 7797095 28.0 28.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5857 2020-02-14 Washington WA 7797095 21.0 21.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5858 2020-02-13 Washington WA 7797095 18.0 18.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5859 2020-02-12 Washington WA 7797095 18.0 18.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5860 2020-02-11 Washington WA 7797095 17.0 17.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5861 2020-02-10 Washington WA 7797095 16.0 16.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5862 2020-02-09 Washington WA 7797095 13.0 13.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5863 2020-02-08 Washington WA 7797095 13.0 13.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5864 2020-02-07 Washington WA 7797095 12.0 12.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5865 2020-02-06 Washington WA 7797095 11.0 11.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5866 2020-02-05 Washington WA 7797095 8.0 8.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5867 2020-02-04 Washington WA 7797095 8.0 8.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5868 2020-02-03 Washington WA 7797095 7.0 7.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5869 2020-02-02 Washington WA 7797095 6.0 6.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5870 2020-02-01 Washington WA 7797095 4.0 4.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5871 2020-01-31 Washington WA 7797095 3.0 3.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5872 2020-01-30 Washington WA 7797095 3.0 3.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5873 2020-01-29 Washington WA 7797095 3.0 3.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5874 2020-01-28 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5875 2020-01-27 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5876 2020-01-26 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5877 2020-01-25 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5878 2020-01-24 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5879 2020-01-23 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615
5880 2020-01-22 Washington WA 7797095 2.0 2.0 NaN NaN NaN NaN ... NaN NaN 0 0 0 0 0 NaN 1.7 13255.0615

5881 rows × 29 columns

# Save formatted dataset offline in case of disaster
dataset_file = 'results/all_cases.csv'
all_cases.to_csv(dataset_file)
# convert date to datetime format
all_cases['date'] = pd.to_datetime(all_cases['date'])

An Exploratory data analysis of the US dataset

Basic triad of the dataset: validating data types and data integrity of each row

dataset_file = 'results/all_cases.csv'
covid_df = pd.read_csv(dataset_file, index_col=0) 
# convert date to datetime format
covid_df['date'] = pd.to_datetime(covid_df['date'])
covid_df.info()
# set float format to 3 decimals
pd.set_option('display.float_format', lambda x: '%.3f' % x)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5881 entries, 0 to 5880
Data columns (total 29 columns):
date                      5881 non-null datetime64[ns]
state                     5881 non-null object
abbrev                    5881 non-null object
population                5881 non-null int64
positive                  5881 non-null float64
active                    5881 non-null float64
hospitalizedCurrently     3598 non-null float64
hospitalizedCumulative    3198 non-null float64
inIcuCurrently            1858 non-null float64
onVentilatorCurrently     1653 non-null float64
recovered                 5881 non-null float64
dataQualityGrade          4947 non-null object
lastUpdateEt              5526 non-null object
dateModified              5526 non-null object
checkTimeEt               5526 non-null object
death                     5881 non-null float64
hospitalized              3198 non-null float64
totalTestsViral           1568 non-null float64
positiveTestsViral        525 non-null float64
negativeTestsViral        524 non-null float64
positiveCasesViral        3059 non-null float64
commercialScore           5881 non-null int64
negativeRegularScore      5881 non-null int64
negativeScore             5881 non-null int64
positiveScore             5881 non-null int64
score                     5881 non-null int64
grade                     0 non-null float64
bedsPerThousand           5881 non-null float64
total_beds                5881 non-null float64
dtypes: datetime64[ns](1), float64(16), int64(6), object(6)
memory usage: 1.3+ MB
covid_df.head()
date state abbrev population positive active hospitalizedCurrently hospitalizedCumulative inIcuCurrently onVentilatorCurrently ... negativeTestsViral positiveCasesViral commercialScore negativeRegularScore negativeScore positiveScore score grade bedsPerThousand total_beds
0 2020-06-27 Alaska AK 734002 854.000 319.000 11.000 nan nan 1.000 ... nan nan 0 0 0 0 0 nan 2.200 1614.804
1 2020-06-27 Alabama AL 4908621 35083.000 15298.000 655.000 2697.000 nan nan ... nan 34605.000 0 0 0 0 0 nan 3.100 15216.725
2 2020-06-27 Arkansas AR 3038999 18740.000 5707.000 284.000 1337.000 nan 63.000 ... nan 18740.000 0 0 0 0 0 nan 3.200 9724.797
3 2020-06-27 Arizona AZ 7378494 70051.000 59813.000 2577.000 4595.000 657.000 433.000 ... nan 69641.000 0 0 0 0 0 nan 1.900 14019.139
4 2020-06-27 California CA 39937489 206433.000 200561.000 5790.000 nan 1562.000 nan ... nan 206433.000 0 0 0 0 0 nan 1.800 71887.480

5 rows × 29 columns

The NaN values may indicate that there were no to few Covid-19 patients at these date points. We further analyse the statistical values of the dataset columns to ensure data integrity and accuracy.

covid_df.describe()
# TODO rounding up the numbers
population positive active hospitalizedCurrently hospitalizedCumulative inIcuCurrently onVentilatorCurrently recovered death hospitalized ... negativeTestsViral positiveCasesViral commercialScore negativeRegularScore negativeScore positiveScore score grade bedsPerThousand total_beds
count 5881.000 5881.000 5881.000 3598.000 3198.000 1858.000 1653.000 5881.000 5881.000 3198.000 ... 524.000 3059.000 5881.000 5881.000 5881.000 5881.000 5881.000 0.000 5881.000 5881.000
mean 6543367.585 20916.359 18615.094 1028.274 4343.898 444.164 226.540 4396.344 1091.030 4343.898 ... 288695.632 31980.345 0.000 0.000 0.000 0.000 0.000 nan 2.625 15806.903
std 7387134.109 46506.152 41859.038 1934.647 12924.575 695.599 330.534 10915.396 2906.033 12924.575 ... 383203.556 56469.631 0.000 0.000 0.000 0.000 0.000 nan 0.744 16159.795
min 567025.000 0.000 0.000 1.000 0.000 2.000 0.000 0.000 0.000 0.000 ... 17.000 0.000 0.000 0.000 0.000 0.000 0.000 nan 1.600 1318.928
25% 1778070.000 631.000 543.000 121.000 219.250 82.000 36.000 0.000 12.000 219.250 ... 49528.250 5009.500 0.000 0.000 0.000 0.000 0.000 nan 2.100 3773.952
50% 4499692.000 5025.000 4494.000 402.000 954.000 185.500 94.000 206.000 144.000 954.000 ... 138808.000 13637.000 0.000 0.000 0.000 0.000 0.000 nan 2.500 11557.920
75% 7797095.000 20595.000 17382.000 1037.750 3215.750 487.000 250.000 3064.000 772.000 3215.750 ... 354942.750 35269.500 0.000 0.000 0.000 0.000 0.000 nan 3.100 19124.737
max 39937489.000 391923.000 356899.000 18825.000 89995.000 5225.000 2425.000 78248.000 24830.000 89995.000 ... 2010839.000 391923.000 0.000 0.000 0.000 0.000 0.000 nan 4.800 71887.480

8 rows × 22 columns

# drop unnecessary columns
covid_cleaned = covid_df.drop(['hospitalized', 'bedsPerThousand'], axis=1)
covid_100k = covid_cleaned.copy()
# list of columns to transform to per 100k
columns_list = ['positive', 'active', 'recovered', 'death', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'onVentilatorCurrently', 'total_beds', 'totalTestsViral']
# add columns per 100k
for column in columns_list:
    if column == 'total_beds':
        covid_100k['BedsPer100k'.format(column)] = (covid_cleaned.loc[:, column] / covid_cleaned.loc[:, 'population']) * 100000
    else:
        covid_100k['{}_100k'.format(column)] = (covid_cleaned.loc[:, column] / covid_cleaned.loc[:, 'population']) * 100000

covid_100k = covid_100k.drop(columns_list, axis=1)
covid_100k['date'] = pd.to_datetime(covid_100k['date'])
start_date = '2020-04-18'
end_date = '2020-05-19'
mask = (covid_100k['date'] > start_date) & (covid_100k['date'] <= end_date)
covid_100k_last_month = covid_100k.loc[mask]
covid_100k_last_month_part1 =  covid_100k_last_month.groupby('date').sum().loc[:, ['positive_100k','active_100k','recovered_100k','death_100k','hospitalizedCumulative_100k']].diff(periods=1, axis=0)

covid_100k_last_month_part2 = covid_100k_last_month.groupby('date').sum().loc[:, ['inIcuCurrently_100k','onVentilatorCurrently_100k','BedsPer100k']]

final_100k_last_month = covid_100k_last_month_part1.merge(covid_100k_last_month_part2, left_index=True, right_index=True)
final_100k_last_month.head()
positive_100k active_100k recovered_100k death_100k hospitalizedCumulative_100k inIcuCurrently_100k onVentilatorCurrently_100k BedsPer100k
date
2020-04-19 nan nan nan nan nan 153.528 80.717 13440.000
2020-04-20 413.759 391.692 35.481 25.728 22.652 156.581 79.710 13440.000
2020-04-21 387.394 360.446 65.218 30.520 31.446 166.081 78.603 13440.000
2020-04-22 428.601 989.954 412.625 28.780 36.181 167.561 78.032 13440.000
2020-04-23 452.031 -2213.482 72.921 26.282 28.842 166.277 94.521 13440.000
final_100k_last_month.describe()
positive_100k active_100k recovered_100k death_100k hospitalizedCumulative_100k inIcuCurrently_100k onVentilatorCurrently_100k BedsPer100k
count 30.000 30.000 30.000 30.000 30.000 31.000 31.000 31.000
mean 399.188 364.943 147.172 23.271 39.160 134.117 73.503 13440.000
std 58.939 634.169 81.341 5.781 43.524 19.860 8.141 0.000
min 287.019 -2213.482 35.481 13.315 9.507 109.602 61.622 13440.000
25% 348.980 314.204 80.563 18.439 22.991 118.222 66.261 13440.000
50% 405.026 366.234 127.774 24.119 28.295 127.613 74.706 13440.000
75% 432.647 419.664 212.491 26.201 32.754 149.768 79.157 13440.000
max 544.349 2291.210 412.625 33.917 246.371 167.561 94.521 13440.000
# save description cleaned dataset to csv
describe_file = 'results/final_100k_last_month.csv'
final_100k_last_month.describe().to_csv(describe_file)

Graphical Exploratory Analysis

Plotting histograms, scatterplots and boxplots to assess the distribution of the entire US dataset.

# Omitting the categorical (states/abbreviations) and time columns 
# There must be an easier way for you, but this was the easiest way I could think of
covid_cleaned['date'] = pd.to_datetime(covid_cleaned['date'])
# mask data for last month
start_date = '2020-04-18'
end_date = '2020-05-19'
mask = (covid_cleaned['date'] > start_date) & (covid_cleaned['date'] <= end_date)
covid_cleaned_last_month = covid_cleaned.loc[mask]
plot_df = covid_cleaned_last_month[['population', 'active', 'recovered', 'death', 'hospitalizedCurrently', 'inIcuCurrently', 'onVentilatorCurrently', 'total_beds']]
plot_df_last_month = covid_100k_last_month[['population', 'active_100k', 'recovered_100k', 'death_100k', 'hospitalizedCurrently_100k', 'inIcuCurrently_100k', 'onVentilatorCurrently_100k', 'BedsPer100k']]
timeseries_usa_df = covid_100k.loc[:, ['date', 'positive_100k', 'active_100k', 'recovered_100k', 'death_100k', 'hospitalizedCurrently_100k', 'inIcuCurrently_100k', 'onVentilatorCurrently_100k', 'BedsPer100k']].groupby('date').sum().reset_index()
# timeseries_usa_df['log_positive'] = np.log(timeseries_usa_df['positive_100k'])
# timeseries_usa_df['log_active'] = np.log(timeseries_usa_df['active_100k'])
# timeseries_usa_df['log_recovered'] = np.log(timeseries_usa_df['recovered_100k'])
# timeseries_usa_df['log_death'] = np.log(timeseries_usa_df['death_100k'])
timeseries_usa_df.tail()
date positive_100k active_100k recovered_100k death_100k hospitalizedCurrently_100k inIcuCurrently_100k onVentilatorCurrently_100k BedsPer100k
153 2020-06-23 32860.514 19133.883 12183.722 1542.909 403.069 71.078 37.684 13440.000
154 2020-06-24 33315.285 19401.954 12359.391 1553.940 408.570 68.612 36.820 13440.000
155 2020-06-25 33812.912 19730.969 12498.864 1583.079 414.087 67.864 36.962 13440.000
156 2020-06-26 34335.924 20098.997 12643.998 1592.929 404.115 67.051 34.318 13440.000
157 2020-06-27 34829.638 20417.559 12812.241 1599.839 407.257 68.533 35.118 13440.000
covid_df.head()
date state abbrev population positive active hospitalizedCurrently hospitalizedCumulative inIcuCurrently onVentilatorCurrently ... negativeTestsViral positiveCasesViral commercialScore negativeRegularScore negativeScore positiveScore score grade bedsPerThousand total_beds
0 2020-06-27 Alaska AK 734002 854.000 319.000 11.000 nan nan 1.000 ... nan nan 0 0 0 0 0 nan 2.200 1614.804
1 2020-06-27 Alabama AL 4908621 35083.000 15298.000 655.000 2697.000 nan nan ... nan 34605.000 0 0 0 0 0 nan 3.100 15216.725
2 2020-06-27 Arkansas AR 3038999 18740.000 5707.000 284.000 1337.000 nan 63.000 ... nan 18740.000 0 0 0 0 0 nan 3.200 9724.797
3 2020-06-27 Arizona AZ 7378494 70051.000 59813.000 2577.000 4595.000 657.000 433.000 ... nan 69641.000 0 0 0 0 0 nan 1.900 14019.139
4 2020-06-27 California CA 39937489 206433.000 200561.000 5790.000 nan 1562.000 nan ... nan 206433.000 0 0 0 0 0 nan 1.800 71887.480

5 rows × 29 columns

# get data from last day
# plot_df_last_date = plot_df.loc[covid_df['date'] == '2020-05-18'] 

# Plotting histograms to gain insight of the distribution shape, skewness and scale
fig, axs = plt.subplots(4,2,figsize = (16, 16))
sns.set()
for i, column in enumerate(plot_df_last_month.columns):
    if (i + 1) % 2 == 0:
        ax = axs[(i//2), 1]
    else:
        ax = axs[(i//2), 0]
    sns.distplot(plot_df_last_month[column], fit=norm, fit_kws=dict(label='normality'), hist_kws=dict(color='plum', edgecolor='k', linewidth=1, label='frequency'), ax=ax, color='#9d53ad')
    ax.legend(loc='upper right')
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Looking at linearity and variance with scatterplots
# Removing the target variable and saving it in another df
target = plot_df.hospitalizedCurrently
indep_var = plot_df.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var.columns):
    ax=fig.add_subplot(4, 3, i+1) 
    sns.regplot(x=indep_var[col], y=target, data=indep_var, label=col, scatter_kws={'s':10}, line_kws={"color": "plum", 'label': 'hospitCurr'})
    plt.suptitle('Scatterplots with Target Hospitalized Patients Showing Growth Trajectories', fontsize=18)
    plt.legend()
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Assessing the normality of the distribution with a boxplot
# Boxplot with removed outliers
fig, ax = plt.subplots(figsize = (16, 12))
for i, col in enumerate(plot_df.columns):
    ax=fig.add_subplot(4, 3, i+1) 
    sns.boxplot(x=plot_df[col], data=plot_df, color='lightblue', showfliers=False)
    plt.suptitle('Boxplots of Independent Variables', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# get data from last day
plot_df_last_date = plot_df.loc[covid_df['date'] == '2020-05-18'] 

fig, ax = plt.subplots(figsize = (16, 12))
for i, col in enumerate(plot_df_last_date.columns):
    ax=fig.add_subplot(4, 3, i+1) 
    sns.boxplot(x=plot_df_last_date[col], data=plot_df, color='lightblue', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)

Analysis of Hospitalizations, ICU Hospitilizations, Active Cases, New Cases, Deaths, and Recoveries by State

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
C:\Users\Doctor Gomez\AppData\Roaming\Python\Python37\site-packages\pandas\plotting\_converter.py:129: FutureWarning:

Using an implicitly registered datetime converter for a matplotlib plotting method. The converter was registered by pandas on import. Future versions of pandas will require you to explicitly register matplotlib converters.

To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()

Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

Alaska

# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in AK Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Patients in AK Currently on a Ventilator', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.recovered, linewidth=3.3)
plt.title('Number of Recovered Cases in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.totalTestsViral, linewidth=3.3)
plt.title('Number of Cumulative Deaths in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Split covid_df into subset with only NY values
alaska = covid_df.loc[covid_df['abbrev'] == 'AK'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(alaska.date, alaska.death, linewidth=3.3)
plt.title('Number of Cumulative Viral Tests in AK', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots AK
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables AK', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of AK
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables AK', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDALASKA

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
Text(0, 0.5, 'No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
Text(0, 0.5, 'No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
Text(0, 0.5, 'No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

New York

# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCurrently, linewidth=3.3)
plt.title('Number of Patients in NY Currently Hospitalized', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Patients')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.inIcuCurrently, linewidth=3.3)
plt.title('Number of Active ICU Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.onVentilatorCurrently, linewidth=3.3)
plt.title('Number of Active Ventilator Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.hospitalizedCumulative, linewidth=3.3)
plt.title('Number of Cumulative Hospitilizations in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.positive, linewidth=3.3)
plt.title('Number of Cumulative Positive Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.recovered, linewidth=3.3)
plt.title('Number of Cummulative Recovered Cases in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Victims')
# Split covid_df into subset with only NY values
new_york = covid_df.loc[covid_df['abbrev'] == 'NY'] 
fig, ax = plt.subplots(figsize = (16, 12))
# Timeseries plt
plt.plot(new_york.date, new_york.death, linewidth=3.3)
plt.title('Number of Cumulative Deaths in NY', fontsize=23)
plt.xlabel('Date')
plt.ylabel('No. Postives')



# Omit the categorical and date cols 
new_york = new_york[['positive', 'active', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'recovered', 'death', 'hospitalized']]
# Scatter plots NY
# Split dependent var from independent variables
target_ny = new_york.hospitalizedCurrently
indep_var_ny = new_york.drop(columns=['hospitalizedCurrently'])

fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(indep_var_ny.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.regplot(x=indep_var_ny[col], y=target_ny, data=indep_var_ny, label=col, scatter_kws={'s':10}, line_kws={"color": "plum"})
    plt.suptitle('Distributions of Independent Variables NY', fontsize=23)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
# Boxplot of NY
fig, ax = plt.subplots(figsize = (16, 16))
for i, col in enumerate(new_york.columns):
    ax=fig.add_subplot(3, 3, i+1) 
    sns.boxplot(x=new_york[col], data=new_york, color='lightpink', showfliers=True)
    plt.suptitle('Boxplots of Independent Variables NY', fontsize=18)
plt.tight_layout()
fig.subplots_adjust(top=0.95)
###ENDNEWYORK

Assessing Correlation of Independent Variables

# TODO add some explanation / look more into collinear variables
# Heatmap of correlations
# Save correlations to variable
corr = covid_cleaned.corr(method='pearson')
# We can create a mask to not show duplicate values
mask = np.triu(np.ones_like(corr, dtype=np.bool))
# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(16,16))

# Generate heatmap
sns.heatmap(corr, annot=True, mask=mask, cmap='GnBu', center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

Build model for dependent Variable

  • To be used to predict current hospitalizations
  • Having more complete variables for in ICU currently and on Ventilator Currently will allow us to predict these numbers as well.
# We compare three models:
# - Polynomial Regression
# - Linear Regression
# - ElasticNet

# Copy DFs to not mess up original one
# We will use model_df for our regression model
model_df = all_cases.copy()

# Delete redundant rows
for row in ['abbrev', 'bedsPerThousand', 'hospitalized', 
'state', 'hospitalizedCumulative', 'dataQualityGrade', 'lastUpdateEt']:
    del model_df[row]

# Drop NaN values for hospitalizedCurrently
model_df = model_df.dropna(subset=['hospitalizedCurrently'])

# Drop Values with abnormal active-hospitalised ratios (outside Conf. Interval)
model_df['ratio_hospital'] = model_df['hospitalizedCurrently'] / model_df['active']
model_df = model_df[~(model_df['ratio_hospital'] >= model_df.ratio_hospital.quantile(0.99))]

#model_df = model_df[~(model_df['ratio_hospital'] <= model_df['ratio_hospital'].median())]
del model_df['ratio_hospital']

# Get peek of model to use
model_df.describe()
#Printing data for visual verification.
all_cases
### Mark Bee (https://www.facebook.com/markbeenyc) - do you need a sippy cup lesson on this information?